/*

This program makes a square dataset of firms imports, exports, and MNE activity by country

 */
 
 



 **Set directories
cd ""  /* PROJECT ROOT FOLDER */

/* DEFINE GLOBALS PATHS HERE */

global data "data"
global input_orig "input"
global input "data"
global temp work
global output output



  
use $input/country_imp_aff2007.dta, clear  // this dataset is firms with some imports or MNE activity -


drop tariff_avg-drop  // not properly populated. 

*1. Prepare data for regressions
********************************************************
replace mne_status="FOR" if mne_status=="FO2"
replace for=0 if mne_status=="MNE"

tab type, miss
********************************************************


*2. Limit dataset to all firms that have some manuf MNE or trade activity
********************************************************
capture drop firm_importer firm_man_aff firm_exporter
foreach var in importer exporter man_aff for {
	bys firmid: egen firm_`var'=sum(`var')
	}
keep if firm_importer>0 | firm_man_aff>0 | firm_for>0 | firm_exporter>0


bys firmid: egen tot_import_countries=sum(importer)
bys firmid: egen tot_export_countries=sum(exporter)
tab tot_import_countries
tab tot_export_countries
********************************************************




*3. Make a square dataset for probabilities and regressions for each trade type
********************************************************
gen import=imp_value
gen export=exp_value 


keep firmid iso3 emp sales estabs naics4_primary import rp_imp_value rp_exp_value export number_man_affiliates man_aff_sales for pay_lbd

ren pay_lbd pay   

fillin firmid iso3 
  drop if iso3==""  

  
*Replace missing firm-country variable values
foreach vv in import rp_imp_value rp_exp_value export number_man_affiliates man_aff_sales for {
	replace `vv'=0 if `vv'==.
	}


*Replace firm-level variables that should apply throughout
destring naics4, gen(naics4_num)

foreach var in sales emp naics4_num estabs pay {
	bys firmid: egen t1=mean(`var')
	rename `var' orig_`var'
	gen `var'=t1
	drop t1
	}
 drop orig*
 drop naics4_prim _fillin
 
 

 
merge m:1 iso3 using "$data/country_data_unique_un.dta", keepusing(region SubregionName avg_wage_usd yr_sch gdp rd_stock no_firms dist comlang_ethno contig)
 tab iso if _merge==2
 drop if _merge==2
 drop _merge

*Make a human capital-adjusted wage 
gen avg_wage_usd_rel = avg_wage_usd / 3143  /*  This number is from data on Teresa's website  */
label var avg_wage_usd_rel "Average Nominal Monthly Wage in 2007 in 3143 USD (US wage)"
gen wage_hc_adj=avg_wage_usd_rel*exp(-.06*yr_sch)

*Get back the MNE status variable
merge m:1 firmid using "$input/firm_2007_withtypes.dta", keepusing(mne_status)
drop if _merge==2
drop _merge



*************************************************************


*5. Country level probabilities of activities
gen importers=1 if import>0
gen exporters=1 if export>0
gen assemblers=1 if man_aff_sales>0

foreach var in importers exporters assemblers {
	replace `var'=0 if `var'==.
	}
	
* browse if man_aff_sales==0 & number_man_affiliates>0

preserve
  keep if man_aff_sales==0 & number_man_affiliates>0
  tab iso
  tab firmid
  unique firmid
  keep firmid iso number_man_affiliates man_aff_sales
  save $data/temp_investigate_man_aff_sales.dta, replace
restore

*conditional on affiliates
gen importers_c_aff= (import>0 			& man_aff_sales>0 )
gen exporters_c_aff= (export>0 			& man_aff_sales>0 )

*conditional on imports
gen assemblers_c_imp= ( man_aff_sales>0 	& import>0 )
gen exporters_c_imp=  ( export>0 		& import>0 )

*conditional on exports
gen assemblers_c_exp= ( man_aff_sales>0 	& export>0 )
gen importers_c_exp=  ( import>0 		& export>0 )


**Define indicators for activities in region (but not country)
**************
bys firmid region: egen num_imp_countries_r = total(importers)
bys firmid region: egen num_exp_countries_r = total(exporters)
bys firmid region: egen num_maff_countries_r = total(assemblers)
bys firmid region: egen num_for_countries_r = total(for)  //  note that this can only be 1!


label variable num_imp_countries_r "Number of import countries in region"
label variable num_exp_countries_r "Number of export countries in region"
label variable num_maff_countries_r "Number of assembly countries in region"

*trade
foreach flow in imp exp {
	gen region_`flow'orters=1 if num_`flow'_countries_r - `flow'orters > 0
	  replace region_`flow'orters=0 if num_`flow'_countries_r - `flow'orters <= 0
	  replace region_`flow'orters=0 if `flow'orters==1		 // Region indicator only 1 for other countries in region
	  label variable region_`flow'orters "`flow'orter from region but not country"  
	  }
	   
	  
*fdi   
gen region_assemblers=1 if num_maff_countries_r - assemblers > 0
   replace region_assemblers=0 if num_maff_countries_r - assemblers <= 0
 *  replace region_assemblers=1 if region==21   // sample is firms with US manuf   
   replace region_assemblers=0 if assemblers==1    // Region indicator only 1 for other countries in region
   label variable region_assemblers "Affiliate in region but not country"   
   
*foreign-owned
gen region_for=1 if num_for_countries_r==1 & for~=1
   replace region_for=0 if region_for==.
   tab for, miss
   tab region_for, miss 
 label variable region_for "Owned by region but not country" 
  
  
*Indicators for conditional probabilites for firms with other activites in region
   
*conditional on affiliates in region
gen importers_c_raff=1 if import>0 		& region_assemblers==1
gen exporters_c_raff=1 if export>0 		& region_assemblers==1
gen assemblers_c_raff=1 if assemblers>0		&  num_maff_countries_r>1 // need to include firms that assemble in the country

*conditional on imports
gen assemblers_c_rimp=1 if man_aff_sales>0 	& region_importer==1
gen exporters_c_rimp=1 if export>0 		& region_importer==1
gen importers_c_rimp=1 if import>0 		& num_imp_countries_r>1  // need to include firms that import from the country

*conditional on exports
gen assemblers_c_rexp=1 if  man_aff_sales>0 	& region_exporter==1
gen importers_c_rexp=1 if import>0 		& region_exporter==1  
gen exporters_c_rexp=1 if export>0 		& num_exp_countries_r>1   // need to include firms that export to the country


*check for indicator and FOR status
bys firmid: egen tot_for=sum(for)

tab tot_for mne_status, miss

**Save firm-country square dataset 
************************************************************
save "$data/firm_country_matrix.dta", replace
************************************************************


    
